import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

data1 = pd.read_excel("data/data1.xlsx")
# data.info()
data1.drop(["质量等级"], axis=1, inplace=True)
data1.rename(columns={'年': 'Year', '月': 'Month', '日': 'Day'},
            inplace=True)

data2 = pd.read_excel("data/data2.xlsx")
data2 = data2.drop(["V01301", "V04001", "V04002", "V04003"], axis=1)

data = pd.concat([data1, data2], axis=1)
data.info()
data.to_csv("data/data.csv")


df = pd.read_csv("data/data.csv")
df.info()

# 插值法填补空缺值
df['O3'] = df['O3'].interpolate()
df['AQI'] = df['AQI'].interpolate()
df['SO2'] = df['SO2'].interpolate()
df['NO2'] = df['NO2'].interpolate()
df['CO'] = df['CO'].interpolate()
df['V13305'] = df['V13305'].interpolate()
df['V10004_700'] = df['V10004_700'].interpolate()
df['V11291_700'] = df['V11291_700'].interpolate()
df['V12001_700'] = df['V12001_700'].interpolate()
df['V13003_700'] = df['V13003_700'].interpolate()

df.rename(columns={"V13305": "precipitation",
                   "V10004_700": "air_pressure",
                   "V11291_700": "wind_speed",
                   "V12001_700": "temperature",
                   "V13003_700": "humidity"},
          inplace=True)

target_PM = df['PM2.5']
target_AQI = df['AQI']
data = df.drop(['PM2.5', 'AQI'], axis=1)

target_PM.to_csv("data/PM.csv")
target_AQI.to_csv("data/AQI.csv")
data.to_csv("data/factor.csv")
